library(tidyverse)
## -- Attaching packages --------------------------------------- tidyverse 1.3.0 --
## v ggplot2 3.3.2 v purrr 0.3.4
## v tibble 3.0.1 v dplyr 1.0.0
## v tidyr 1.1.0 v stringr 1.4.0
## v readr 1.3.1 v forcats 0.5.0
## -- Conflicts ------------------------------------------ tidyverse_conflicts() --
## x dplyr::filter() masks stats::filter()
## x dplyr::lag() masks stats::lag()
theme_set(theme_minimal())
# social vulnerability data
CDC_SVI_data <- read.csv('data/2018_CDC_SVI.csv')
# -999 means missing data in this data set
CDC_SVI_data <- CDC_SVI_data %>% filter(E_TOTPOP != 0 & E_CROWD != -999 & E_POV != -999 && E_HU != -999 & EP_POV != -999 & EP_CROWD != -999)
# Urban and rural county classification data from: https://www.cdc.gov/nchs/data_access/urban_rural.htm
urban_rural_data <- read.csv('data/NCHSURCodes2013.csv')
urban_rural_data <- urban_rural_data %>% mutate(FIPS = ï..FIPS.code)
# join urban/rural county designations with the CDC data
CDC_SVI_data <- left_join(CDC_SVI_data , urban_rural_data, by = 'FIPS')
# group crowding counts, household counts, and population counts by state
CDC_SVI_state <- CDC_SVI_data %>% group_by(ST_ABBR) %>% summarise(crowd = sum(E_CROWD), households = sum(E_HU), pop = sum(E_TOTPOP), poverty = sum(E_POV))
## `summarise()` ungrouping output (override with `.groups` argument)
CDC_SVI_state
## # A tibble: 52 x 5
## ST_ABBR crowd households pop poverty
## <chr> <int> <int> <int> <int>
## 1 AK 16847 315386 738516 77865
## 2 AL 32038 2244462 4864680 829400
## 3 AR 30795 1362040 2990671 510337
## 4 AZ 113062 2970935 6946685 1092192
## 5 CA 1065030 14084824 39148760 5487141
## 6 CO 55538 2352202 5531141 590504
## 7 CT 25400 1512305 3581504 348449
## 8 DC 10155 311545 684498 109497
## 9 DE 5900 428251 949495 109798
## 10 FL 229353 9348689 20598139 2983851
## # ... with 42 more rows
# Point in time counts of homeless individuals for each individual
# shelter that performs a count
homeless_data <- read.csv("data/2007-2019-PIT-Counts-by-CoC.csv")
# get the state abbreviation from the shelter id, rename the 2019 count column
homeless_data <- homeless_data %>% mutate('ST_ABBR' = substring(homeless_data$CoC.Number,0,2)) %>% mutate(homeless_count = parse_number(Overall.Homeless..2019))
# Sum the homeless counts by state
homeless_data <- homeless_data %>% select(ST_ABBR, homeless_count) %>% group_by(ST_ABBR) %>% summarise(homeless_counts = sum(homeless_count))
## `summarise()` ungrouping output (override with `.groups` argument)
# join homeless counts with CDC data
CDC_SVI_state <- left_join(CDC_SVI_state, homeless_data, by = 'ST_ABBR')
# get crowding and homeless counts as proportions
CDC_SVI_state <- CDC_SVI_state %>% mutate('crowd_percent' = crowd/households * 100) %>% mutate('pov_percentage' = poverty/pop * 100)
CDC_SVI_state <- CDC_SVI_state %>% mutate('homeless_percent' = homeless_counts/pop * 100)
CDC_SVI_state
## # A tibble: 52 x 9
## ST_ABBR crowd households pop poverty homeless_counts crowd_percent
## <chr> <int> <int> <int> <int> <dbl> <dbl>
## 1 AK 1.68e4 315386 7.39e5 77865 1907 5.34
## 2 AL 3.20e4 2244462 4.86e6 829400 3261 1.43
## 3 AR 3.08e4 1362040 2.99e6 510337 2717 2.26
## 4 AZ 1.13e5 2970935 6.95e6 1092192 10007 3.81
## 5 CA 1.07e6 14084824 3.91e7 5487141 151278 7.56
## 6 CO 5.55e4 2352202 5.53e6 590504 9619 2.36
## 7 CT 2.54e4 1512305 3.58e6 348449 3033 1.68
## 8 DC 1.02e4 311545 6.84e5 109497 6521 3.26
## 9 DE 5.90e3 428251 9.49e5 109798 921 1.38
## 10 FL 2.29e5 9348689 2.06e7 2983851 28328 2.45
## # ... with 42 more rows, and 2 more variables: pov_percentage <dbl>,
## # homeless_percent <dbl>
library(plotly)
##
## Attaching package: 'plotly'
## The following object is masked from 'package:ggplot2':
##
## last_plot
## The following object is masked from 'package:stats':
##
## filter
## The following object is masked from 'package:graphics':
##
## layout
library(tidyverse)
library(htmlwidgets)
CDC_SVI_state <- CDC_SVI_state %>% mutate('State' = ST_ABBR)
# plot homeless & crowding data
homeless_v_crowd <- ggplot(CDC_SVI_state, aes(x=crowd_percent, y=homeless_percent, label = State)) + geom_point() + geom_smooth(method ='lm') + ggtitle("Homeless % vs. Crowding % by US State") +
xlab("Percentage Crowded Households") + ylab("Percentage Homeless Population")
ggsave("homelessness_v_crowding.jpg")
## Saving 7 x 5 in image
## `geom_smooth()` using formula 'y ~ x'
p <- ggplotly(homeless_v_crowd, tooltip = 'State')
## `geom_smooth()` using formula 'y ~ x'
p
htmlwidgets::saveWidget(as_widget(p), "HomelessAndCrowding.html")
CDC_SVI_state %>% select("crowd_percent", "homeless_percent")%>% summary()
## crowd_percent homeless_percent
## Min. :1.136 Min. :0.03962
## 1st Qu.:1.559 1st Qu.:0.08456
## Median :1.929 Median :0.10104
## Mean :2.379 Mean :0.15574
## 3rd Qu.:2.540 3rd Qu.:0.14770
## Max. :7.676 Max. :0.95267
CDC_SVI_data1 <- CDC_SVI_data %>% filter( !is.na(X2013.code))
CDC_SVI_data2 <- CDC_SVI_data1 %>% filter((EP_POV >= 0) & (EP_POV < 35) & (EP_CROWD<8))
poverty_vs_crowd2 <- ggplot(CDC_SVI_data2, aes(x=EP_CROWD, y=EP_POV,color=as.character(X2013.code), alpha = 0.1)) + geom_point() + geom_smooth(method ='lm', colour="black")+ theme(legend.position = "none") + facet_wrap(~X2013.code) + ggtitle("Poverty vs. Crowding by US County, by Urban (1) to Rural (6)") +xlab("Percentage Crowded Living Conditions") + ylab("Percentage In Poverty")
poverty_vs_crowd2
## `geom_smooth()` using formula 'y ~ x'

ggsave("poverty_v_crowding.jpg")
## Saving 7 x 5 in image
## `geom_smooth()` using formula 'y ~ x'
CDC_SVI_data <- CDC_SVI_data %>% filter(EP_CROWD < 10 & !is.na(X2013.code))
plot3 <- ggplot(CDC_SVI_data, aes(x = as.character(X2013.code), y = EP_CROWD, color =as.character(X2013.code))) + geom_boxplot() + theme(legend.position = "none") + ggtitle("Crowded Living Conditions in US Counties") +
xlab("Urban (1) to Rural (6)") + ylab("Percentage in Crowded Living Conditions")
plot3

ggsave("crowding_boxplot.jpg")
## Saving 7 x 5 in image
CDC_SVI_data %>% select("EP_CROWD", "EP_POV")%>% summary()
## EP_CROWD EP_POV
## Min. :0.000 Min. : 2.30
## 1st Qu.:1.200 1st Qu.:10.90
## Median :1.900 Median :14.70
## Mean :2.249 Mean :15.46
## 3rd Qu.:2.800 3rd Qu.:18.90
## Max. :9.800 Max. :49.70
http://rmarkdown.rstudio.com